###################################################################################################################################
### Script of the manuscript:																					                                                          ###
### Marin-Diaz B, et al - The importance of marshes on soil stabilization in case of a dike breach                              ###
### Date created: 13/10/20 Groningen                                                                                            ###
### Last modified: 10/08/2021                                                                                                   ###                ###
###################################################################################################################################


# CONTENTS:
# A. PLOT EROSION
# B. EROSION IN ALL THE SAMPLES (MARSH AND TIDAL FLAT)
  #NMDS
  #GLM
# C. EROSION IN STABLE MARSH SAMPLES
  #NMDS
  #GLM
# D. CRACKS STABILITY

#delete previous objects
rm(list=ls(all=TRUE))	 

# read relevant libraries
library(tidyverse)      # for dplyr, ggplot2 etc
library(MASS)           # for stepAIC
library(vegan)          # for ordinations as NMDS
library(ggfortify)      # for plotting ordinations
library(psych)          # for the usefull pairs function
#install.packages("psych")


inputDirectory1 = 'C:/' #insert link to directory where the data csv are stored
setwd(inputDirectory1)
data<-read.csv("MarinDiaz et al data_variables.csv")
data$Treatment<- as.factor(data$Treatment)



############
# A. PLOT EROSION

data %>% filter(!code %in% c("schi6","schi9")) %>%  #in schi6 and schi9 the erosion test was not good because water filtered throught the side of the samples, therefore we remove these samples from the analysis
  ggplot(aes(x=Treatment, y=erosionNOdebris)) + 
    geom_boxplot(aes(col=Treatment,bg=Treatment)) +
    theme_classic(base_size=15)+
    labs( x= "Treatment", y = "Mean top erosion (cm)")+
    scale_fill_manual(values = c( 'greenyellow', 'deepskyblue2', 'cyan3','yellow1',"gold","coral"))+
    scale_color_manual(values = c( 'yellowgreen','deepskyblue3',  'cyan4', 'yellow2', "orange","coral2"))+
    scale_x_discrete(labels=c("marsh fine grain"= "Silty stablished marsh", "mud_pioneer"= "Silty pioneer marsh", "ntidal_flat_mud"= "Silty tidal flat",
                              "omarsh_sand"= "Sandy stablished marsh", "sandy_pioneer"= "Sandy pioneer marsh", "tidal_flat_sand"= "Sandy tidal flat"))+
    theme(legend.position="right")+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
############
# B. EROSION IN ALL THE SAMPLES (MARSH AND TIDAL FLAT)
############ 

  # explore the histogram of erosion rate, showing the near-binomial distribution (they erode or not)
  data %>% ggplot(aes(x=erosionNOdebris)) + 
    geom_histogram(breaks=seq(0, 20, by=1), 
                   col="black", 
                   fill="black", 
                   alpha = .5) + 
    labs(title="Histogram for total erosion depth at the end of the experiment", x="total erosion (cm)", y="Count")
  
  
  #################Do an NMDS ordination to explore which factors are important
  data2<-data %>% dplyr::select("binom_erosion","compaction","BulkL1","SWCL1","OCL1NoCoarseRoots",
                                "L1SD50", "L1Silt","totalrootdensity","rhizdensity","coarserootdensity",
                                "finerootdensity","totalBGB")
  data2 <- data2 %>% rename("Comp"=compaction, 
                          "BulkD"=BulkL1, 
                          "SWC"=SWCL1, 
                          "OC"=OCL1NoCoarseRoots,
                          "SD50"=L1SD50, 
                          "Silt"=L1Silt, 
                          "RD"=totalrootdensity, 
                          "RhizD"=rhizdensity, 
                          "RD.C"=coarserootdensity,
                          "RD.F"=finerootdensity, 
                          "BGB"=totalBGB)
  
  plot(OCL1NoCoarseRoots~OCL1YesCoarseRoots, data) # OC including coarse roots is strongly correlated without roots
  # explore the correlation matrix of all variables
  pairs.panels(data2 %>% dplyr::select(-binom_erosion),smooth=T,ellipses = F,stars=T,method="spearman", cex.labels=1, cex.cor=2)
  # many relations are non-linear and correlated, so better to use an NMDS than PCA
  nmds<-vegan::metaMDS(data2 %>% dplyr::select(-binom_erosion),trace=F) #nmds all variable minus binom_erosion
  nmds
  # plot the result to explain what eroded and what not
  cols <- c('yellowgreen', 'deepskyblue2', 'cyan4', 'yellow2', "orange","coral2")
  pchs <-c(1, 2)
  data$binom_erosion_f<-factor(data$binom_erosion)
  levels(data$binom_erosion_f)<-c("not eroded","eroded")
  plot(nmds, type = 'n')
  points(nmds, display = 'sites', col = cols[factor(data$Treatment)],
         pch=pchs[data$binom_erosion_f],cex=2, lwd=3)
  text(nmds, display = 'species',col="black")
  text(nmds, display = 'sites',col=cols[factor(data$Treatment)],pos=4,offset=0.4)
  legend('topleft', legend = tools::toTitleCase(levels(data$Treatment)),
         pch=c(16,16),col=cols,bty='n',cex=c(1.2,1.2))
  legend('bottomleft', legend = tools::toTitleCase(levels(data$binom_erosion_f)),
         pch=pchs,bty='n')
  # note that this does not use binom_erosion or Treatment in the ordination - only used to label points afterwards
  
  # convert to long data format for continious variable
  data2<-data %>% dplyr::select("Treatment", "erosionNOdebris","compaction","BulkL1","SWCL1","OCL1NoCoarseRoots",
                                "L1SD50", "L1Silt","totalrootdensity","rhizdensity","coarserootdensity",
                                "finerootdensity","totalBGB")#change erosionNOdebris for binom_erosion
  data3<- data2 %>% 
    pivot_longer(
      cols = c("compaction","BulkL1","SWCL1","OCL1NoCoarseRoots",
               "L1SD50","L1Silt","totalrootdensity","rhizdensity","coarserootdensity",
               "finerootdensity","totalBGB"),
      names_to = "varname",
      values_to = "predvar",
      values_drop_na = TRUE
    )
  
  ###### plot how the erosion probability is affected by each continious variable
  data3$varname<- factor(data3$varname,levels=c("compaction","BulkL1","SWCL1","OCL1NoCoarseRoots",
                                                 "L1SD50","L1Silt","totalrootdensity","coarserootdensity","finerootdensity","rhizdensity",
                                                 "totalBGB"))
  data3 %>% ggplot(aes(x=predvar,y=erosionNOdebris)) + 
    #geom_point(size=2,shape= "|", aes(col=Treatment, stroke=10) ) +
    geom_point(size=2, alpha= 0.7, aes(col=Treatment) ) +
    labs(x="predictor variable", y="complete erosion probability") +
    scale_color_manual(values = c( 'yellowgreen','deepskyblue3',  'cyan4', 'yellow2', "orange","coral2"))+
    theme_classic(base_size=14)+
    annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf)+ #to add the line in the Y axis
    annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf)+ #to add the line in the Y axis
    geom_smooth(method = "glm", method.args = list(family = "binomial"), se=FALSE, col="black", lty="dotted",data = subset(data3, varname =="BulkL1"| varname =="SWCL1"| varname =="OCL1NoCoarseRoots"
                                                                                                                           | varname =="L1SD50"| varname =="L1Silt"| varname =="totalrootdensity"| varname =="coarserootdensity"
                                                                                                                           | varname =="finerootdensity"| varname =="totalBGB"| varname =="OCL1NoCoarseRoots")) +
    facet_wrap(~varname,scale="free_x")+ 
    theme(axis.text.x = element_text(angle = 45, hjust = 1),strip.background = element_rect(colour="white", fill="white")) #to remove the box from the titles
  # note that the warning is not important: just means values very close to zero
  
  
  # model this relation for single variable, the resulting deviance and signficance may be added to the 
  # figure or to a table
  m1<-glm(binom_erosion~finerootdensity,   # calculate the logistic regression model for this relation
          family = binomial(link=logit),data=data)
  m1 # 
  
  summary(m1) #coefficients
  Anova(m1, type="II", test= "Wald") # analysis of variance for individual terms. Library car
  
  #check if there is overdispersion: if value is more than 1.5 it's overdisperesed:
  summary(m1)$deviance / summary(m1)$df.residual 
  
  library(arm)
  m1<-bayesglm(binom_erosion~finerootdensity,   # calculate the logistic regression model for this relation
          family = binomial(link=logit),data=data)
  m1<-glm(binom_erosion~coarserootdensity,   # calculate the logistic regression model for this relation
               family = quasibinomial(link=logit),data=data)
  
  # model what is the best minimal set of predictors using stepwise logistic regression (glm)
  step(model.null,
       scope = list(upper=model.full),
       direction="both",
       test="Chisq",
       data=Data)
  
    m2<-glm(binom_erosion~totalBGB + L1SD50,
          family = binomial(link=logit),data=data)
  
  #test if interaction is significant (is not)
  #m2<-glm(binom_erosion~totalBGB*L1SD50, #var selected by stepwise
          #family = binomial(link=logit),data=data)
  m2## 
  summary(m2) #coefficients
  #install.packages("car")
  library(car)
  Anova(m2, type="II", test= "Wald") # analysis of variance for individual terms. Library car
  
  #install.packages("rcompanion")
  library(rcompanion)
  nagelkerke(m2)#Pseudo R squared. 
  
  #check if there is overdispersion: if value is more than 1.5 it's overdisperesed:
  summary(m2)$deviance / summary(m2)$df.residual #0.2 it's ok


############
# C. EROSION IN STABLE MARSH SAMPLES
############
  data<-(data[-c(32,35),])
  data2stable<-data %>% 
    dplyr::filter(erosionNOdebris<5) %>%  # filter for relatively stable samples
    dplyr::select("Treatment","zone", "erosionNOdebris","compaction","detritus.lose.layer","BulkL1",
                  "SWCL1","OCL1NoCoarseRoots","L1SD50", "L1Silt","totalrootdensity",
                  "rhizdensity","coarserootdensity","finerootdensity","totalBGB", "naturalcracks") %>%
    dplyr::rename(cracks="detritus.lose.layer") %>%
    dplyr::mutate(cracks=ifelse(cracks=="natural cracks"," with cracks"," no cracks"))
  
  data2stable$Treatment<-factor(data2stable$Treatment)  # clean up factor levels for selection
  # explore the correlation matrix of all variables
  pairs.panels(data2stable,smooth=T,ellipses = F,stars=T,method="spearman", cex.labels=1, cex.cor=2)
  
  
  data2nmds <- data2stable %>% rename("Comp"=compaction, 
                                        "BulkD"=BulkL1, 
                                        "SWC"=SWCL1, 
                                        "OC"=OCL1NoCoarseRoots,
                                        "SD50"=L1SD50, 
                                        "Silt"=L1Silt, 
                                        "RD"=totalrootdensity, 
                                        "RhizD"=rhizdensity, 
                                        "RD.C"=coarserootdensity,
                                        "RD.F"=finerootdensity, 
                                        "BGB"=totalBGB,
                                      "erosion"=erosionNOdebris)
  pairs.panels(data2nmds %>% dplyr::select("Comp","BulkD",
                                             "SWC","OC","SD50", "Silt","RD",
                                             "RhizD","RD.C","RD.F","BGB", "erosion"),smooth=T,ellipses = F,stars=T,method="spearman", cex.labels=1, cex.cor=2)
  
  # many relations are non-linear and correlated, so better to use an NMDS than PCA
  
  nmds_stable<-vegan::metaMDS(data2nmds %>% dplyr::select(-erosion,-Treatment,-zone,-cracks,-naturalcracks),
                              trace=F,trymax = 100) #nmds all variable minus binom_erosion
 
  
  nmds_stable
 
  cols=c('yellowgreen', 'deepskyblue2', 'yellow2')
  plot(nmds_stable, type = 'n')
  points(nmds_stable, display = 'sites',
         cex=1+2*sqrt(data2stable$erosionNOdebris),
         col=cols[data2stable$Treatment],lwd=3)
  text(nmds_stable, display = 'species',col="black")
  orditorp(nmds_stable, display="sites", label = data2stable$zone,col="grey",pos=4)
  legend('bottomright', legend = tools::toTitleCase(levels(data2stable$Treatment)),
         pch=16,bty='n',col=cols)
unique(data2stable$erosionNOdebris)


  # convert to long data format for continious variable
  data3stable<- data2stable %>% 
    pivot_longer(
      cols = c("compaction","BulkL1","SWCL1","OCL1NoCoarseRoots",
               "L1SD50","L1Silt","totalrootdensity","rhizdensity","coarserootdensity",
               "finerootdensity","totalBGB"),
      names_to = "varname",
      values_to = "predvar",
      values_drop_na = TRUE
    )
  
  ###### plot how the erosion probability is affected by each continious variable
  #to correctt the order of the levels for the facet_wrap
  data3stable$varname<- factor(data3stable$varname,levels=c("compaction","BulkL1","SWCL1","OCL1NoCoarseRoots",
                                                "L1SD50","L1Silt","totalrootdensity","coarserootdensity","finerootdensity","rhizdensity",
                                                "totalBGB"))
  
  data3stable %>% ggplot(aes(x=predvar,y=erosionNOdebris)) + 
    geom_point(size=2,alpha=0.6, aes(col=Treatment,shape=naturalcracks) ) + 
    labs(x="predictor variable", y="complete erosion probability") +
    scale_color_manual(values = c( 'yellowgreen',  'deepskyblue2', 'yellow2'))+
    theme_classic(base_size=14)+
    geom_smooth(method = "glm", se=FALSE, method.args = list(family = "Gamma"), col="black", lty="dotted", data = subset(data3stable, varname =="compaction"| varname =="totalrootdensity"| 
                                                                                                                         varname =="finerootdensity"| varname =="totalBGB")) +
    facet_wrap(~varname,scale="free")+
    #facet_wrap(~varname,scale="free_x")+
    theme(axis.text.x = element_text(angle = 45, hjust = 1),strip.background = element_rect(colour="white", fill="white")) #to remove the box from the titles
  
  
  ##single variables analysis
  
  # plot and test for a range of  variables (repeat for the different variables)
  # assume an exponential distribution of the data ( a form of the gamma distribution) 
  m1<-glm(erosionNOdebris~L1SD50,   # calculate gamma regression model for this relation
          family = Gamma(link='log'),data=data2stable)
  m1 # 
  ##from this website: https://rcompanion.org/rcompanion/e_06.html
  summary(m1) #coefficients
  Anova(m1, type="II", test= "Wald") # analysis of variance for individual terms. Library car
  
  #check if there is overdispersion: if value is more than 1.5 it's overdisperesed 
  summary(m1)$deviance / summary(m1)$df.residual 
  

  #### convert to long data format to analyse categorical variables (grazing, top soil type)
  data4stable<-data %>% 
    dplyr::filter(erosionNOdebris<5)# %>%  # filter for relatively stable samples

  data5<-data4stable %>% 
    mutate(OrchestiaPresence=ifelse(is.na(OrchestiaPresence),0,1),
           OrchestiaPresence=factor(OrchestiaPresence),              # NA recoded into zero
           grazing=ifelse(grazing!="ungraz","grazed","ungrazed")) %>%   # two grazing types lumped
    dplyr::select("erosionNOdebris","Treatment","zone", "grazing","OrchestiaPresence", "detritus.lose.layer") %>%
    pivot_longer(
      cols = c("Treatment","zone", "grazing","OrchestiaPresence", "detritus.lose.layer"),
      names_to = "varname",
      values_to = "predvar",
      values_drop_na = F
    )
  
  data4mean <- data5 %>% 
    group_by(varname,predvar,erosionNOdebris) %>% 
    summarise(count = n())
  data4mean %>% ggplot(aes(x=predvar,y=erosionNOdebris)) + 
    geom_boxplot() +
    ylim(0,1) +
    geom_text(data = data4mean, aes(x = predvar, y = erosionNOdebris + 1, label = count))+
    labs(x="predictor variable", y="complete erosion probability") +
    theme(axis.text.x=element_text(angle =45, vjust = 0.5)) +
    facet_wrap(~varname,scale="free") +
    geom_text(data=data4mean, aes(x=predvar, y=1, label=count), 
              colour="black", inherit.aes=FALSE, parse=FALSE)
  
  #for grazing we only test mature marshes (because pioneer is not grazed)
  mature<-data[data$Treatment == "marsh fine grain" | data$Treatment == "omarsh_sand", ] #select only marshes, no tidal flats
  kruskal.test(erosionNOdebris~grazing, mature)
  pairwise.wilcox.test(mature$erosionNOdebris, mature$grazing,
                       p.adjust.method = "BH") 
  
  
###
# D. CRACKS STABILITY
###
  data<-read.csv("MarinDiaz et al cracks_stability.csv")
  
  ###ARITIFICAL CRACKS
  #artificial cracks were made to samples with a range of clay layer depths. 
  #there were 2 types of cracks: 4 cm dee and 8 cm deep
  
  data$crackdepth<- as.factor(data$crackdepth)
  data$crackwidth<- as.factor(data$crackwidth)
  data$collapse<- as.factor(data$collapse)
  
   #install.packages("arm")
  library(arm)
  
  #plot 
  ggplot(aes(x=ratio_claydepth_crackdepth,y=collapse), data=data) + 
    theme_classic(base_size=14)+
    geom_point(size=4,aes(alpha=0.5))+
    scale_color_manual(values = c( 'black'))+
    labs(x="Ratio cohesive layer depth to crack depth", y="Probability of soil collapse") +
    geom_smooth(method = "bayesglm", 
                method.args = list(family = "binomial"), 
                se = FALSE, col="grey", lty="dotted") 
  

  ##model
  m1<-bayesglm(collapse~ratio_claydepth_crackdepth,   # calculate the logistic regression model for this relation
               family = binomial(link=logit),data=data2)
  display(m1)
  summary(m1)
  Anova(m1)

  #check if there is overdispersion: if value is more than 1.5 it's overdisperesed (in this case try quasibunomial or negative binomial):
  summary(m1)$deviance / summary(m1)$df.residual 
  
  
  

